//attempt to replicate dahl results

use "$data/ACS/usa_00008.dta", clear
merge 1:1 serial pernum using "$data/ACS/usa_00009", nogen //merge on fips codes
merge 1:1 serial pernum using "$data/ACS/usa_00010", nogen //merge on weeks worked
merge 1:1 serial pernum using "$data/ACS/usa_00014", nogen //merge on bpls
merge 1:1 serial pernum using "$data/ACS/usa_00015", nogen //merge on relate values
merge 1:1 serial pernum using "$data/ACS/census_nchild", keep(match) nogen
merge 1:1 serial pernum using "$data/ACS/usa_00044", nogen //metro status
merge 1:1 serial pernum using "$data/ACS/usa_00047", nogen //family interrelationship stuff
save "$temp/census_readin", replace


use "$temp/census_readin", clear

//dummy for living with sibling or other relative
gen temp = (relate==7 | relate==8 | relate == 10)
bys serial: egen sibling = max(temp)
drop temp

//dummy for living with roommate
gen temp = (relate ==11 | relate == 12)
bys serial: egen roommate = max(temp)
drop temp

//living alone
bys serial: egen maxnum = max(pernum)
gen live_alone = (maxnum==1)

//indicator for working spouse
preserve
drop pernum
ren sploc pernum
keep serial pernum empstat
drop if pernum==0
gen spouse_work = (empstat==1)
tempfile spousework
save `spousework'
restore

merge 1:m serial pernum using `spousework', keep(1 3) nogen

//basic sample restrictions:
keep if sex == 1
keep if age>=25 & age<=54
keep if race == 1
//gen white_asian = ((race==1 & hispan==0) | race==4 | race==5 | race==6)

//working full-time
drop if school==2
drop if incwage == 999999
drop if gq>2 //no group quarters

//natives and didn't move too much
keep if bpl<=56
drop if statefip == 11 | bpl == 11 //kill DC
drop if educ<3 //drop if negligible or indeterminate education
drop if migplac5!=0 & (statefip!=bpl & statefip!=migplac5 & bpl!=migplac5) //moved from 5 years ago AND a difference place

//variable definitions
gen migrant = (statefip!=bpl)
//gen metro = (metarea>0)
ren metro temp
gen metro = (temp>1)
drop temp

//hourly wage
gen hours = uhrswork * wkswork1
merge m:1 year using "$data/GDP/gdp_pce_deflator", nogen keep(match)
replace deflator = deflator/100
replace incwage = incwage/deflator //deflation
gen incwage_1968 = incwage * (100 / 19.152)

//hours/wages restrrictions
//gen hours = wkswork1*uhrswork
drop if age>=36 & (hours<520 | incwage_1968<1500) //adequate work for older heads
drop if age<36 & (hours<260 | incwage_1968<1000) //adequate work for older heads
drop if hours>5280

//windsorize income
su incwage, d
replace incwage = `r(p99)' if incwage>`r(p99)'
gen wage = incwage/hours

//educational categories and years of schooling
gen yrs_sch = 8
replace yrs_sch = 9 if educd==30
replace yrs_sch = 10 if educd==40
replace yrs_sch = 11 if educd==50 | educd==61
replace yrs_sch = 12 if educd==62
replace yrs_sch = 13 if educd==71
replace yrs_sch = 14 if educd==82 | educd==83
replace yrs_sch = 16 if educd==101
replace yrs_sch = 18 if educd==114
replace yrs_sch = 19 if educd==115
replace yrs_sch = 22 if educd==116
gen exp = age-(yrs_sch+6)
drop if exp<0

//educational categories
gen lths = (yrs_sch<12)
gen hs = (yrs_sch==12)
gen somecoll = (yrs_sch>12 & yrs_sch<16)
gen coll = (yrs_sch==16)
gen adv = (yrs_sch>16)
gen educ_cat = 1
replace educ_cat = 2 if hs
replace educ_cat = 3 if somecoll
replace educ_cat = 4 if coll
replace educ_cat = 5 if adv
//keep if educ_cat==1 | educ_cat==2
keep if educ_cat==2 | educ_cat==4

//dummies for marriage and kids and divorce
gen married = (marst==1 | marst==2)
gen child_lt5 = (nchlt5>0)
gen child_518 = (nchild > nchlt5)
gen divorced = (marst==4)
gen child_any = (nchild>0)

//dummmy for living with extended family (children or parents or grandparents)
gen live_extend = (nchild>0 | poploc>0 | momloc>0 | relate == 9 | sibling)
//already have indicators for living alone or with roommate and have working spouse. So, we're all set!

gen livetype = 1 //live alone
replace livetype = 2 if live_extend
replace livetype = 3 if roommate
replace spouse_work = 0 if spouse_work==.
save "$temp/census_2000_dahl_cleaned", replace


//summary stats: All, California, Florda, Illinois, Kansas, New York, Texas: 6, 12, 17, 20, 36, 48. These all check out; Table 1 replicated

*****construct cells and try to replicate Table 2

**************************first handle stayers
use "$temp/census_2000_dahl_cleaned", clear
//types of individuals
egen cell1 = group(bpl educ_cat spouse_work child_lt5 child_518 ) if married 
egen cell2 = group(bpl educ_cat divorced livetype ) if !married
replace cell2 = cell2 + 2028
egen cell = rowtotal(cell*)
drop cell1 cell2
gen uniqid = _n
egen ncell = count(uniqid), by(cell)
drop if ncell<10 //kills ~10k obs
gen stay = (!migrant)
//proportion of people in each cell who stayed in every state

preserve
collapse (mean) stay, by(cell)
save "$temp/staying_probabilities_2000", replace
restore

drop stay
merge m:1 cell using "$temp/staying_probabilities_2000", keep(match) nogen
keep if !migrant
ren stay corrfunc_stay
save "$temp/census_2000_dahl_stayers", replace

//replicate first part of table 2 -- looks good!
//drop if ncell<10 //kills ~10k obs
duplicates drop cell, force //keep one copy of each cell. Characteristics are steady in each cell, so no problem here.
su corrfunc if lths, d
su corrfunc if hs, d
su corrfunc if somecoll, d
su corrfunc if coll, d
su corrfunc if adv, d


**************************now handle movers
use "$temp/census_2000_dahl_cleaned", clear

//eliminate cells that are too small
egen cell1 = group(bpl statefip educ_cat child_any ) if married 
egen cell2 = group(bpl statefip educ_cat live_extend ) if !married
replace cell2 = cell2 + 14025
egen cell = rowtotal(cell*)
drop cell1 cell2
gen uniqid = _n
egen ncell = count(uniqid), by(cell)
//drop if ncell<10 //kills ~10k obs. Don't do this for now.
duplicates drop cell, force
save "$temp/little_cells", replace



//get proportion of people in each master cell who wind up in given state (smaller cell)
use "$temp/census_2000_dahl_cleaned", clear

//eliminate cells that are too small
egen cell1 = group(bpl educ_cat child_any ) if married 
egen cell2 = group(bpl educ_cat live_extend ) if !married
replace cell2 = cell2 + 510
egen cell = rowtotal(cell*)
drop cell1 cell2
gen uniqid = _n
egen ncell = count(uniqid), by(cell)
duplicates drop cell, force
keep cell bpl educ_cat child_any live_extend married ncell 
drop cell
ren ncell ncell_master
save "$temp/big_cells", replace

preserve
keep if married
save "$temp/big_cells_married", replace
restore

keep if !married
save "$temp/big_cells_nonmarried", replace


//get to it
use "$temp/little_cells", clear

preserve
keep if married
merge m:1 bpl educ_cat child_any  using "$temp/big_cells_married", keep(match) nogen
save "$temp/merged_cells_married", replace
restore

keep if !married
merge m:1 bpl educ_cat live_extend   using "$temp/big_cells_nonmarried", keep(match) nogen
append using "$temp/merged_cells_married"
gen fracmove = ncell/ncell_master
su fracmove if bpl!=statefip

//checks out!!
su fracmove if bpl!=statefip & lths
su fracmove if bpl!=statefip & hs
su fracmove if bpl!=statefip & somecoll
su fracmove if bpl!=statefip & coll
su fracmove if bpl!=statefip & adv
keep cell bpl educ_cat live_extend child_any married fracmove statefip 
save "$temp/mig_cell_probs_firstbest", replace

keep if bpl == statefip

preserve
keep if married
save "$temp/mig_cell_probs_retention_married", replace
restore

keep if !married
save "$temp/mig_cell_probs_retention_nonmarried", replace

//now actually get the probabilities into the data
use "$temp/census_2000_dahl_cleaned", clear

//eliminate cells that are too small
egen cell1 = group(bpl statefip educ_cat child_any ) if married 
egen cell2 = group(bpl statefip educ_cat live_extend ) if !married
replace cell2 = cell2 + 14025
egen cell = rowtotal(cell*)
drop cell1 cell2
keep if migrant

//get first-best probabilities
merge m:1 cell using "$temp/mig_cell_probs_firstbest", keep(match) nogen
ren fracmove prob_firstbest

//now grab retention probabilities
preserve
keep if married
merge m:1 bpl educ_cat child_any  using "$temp/mig_cell_probs_retention_married", keep(match) nogen
ren fracmove prob_retention
tempfile married
save `married'
restore

keep if !married
merge m:1 bpl educ_cat live_extend  using "$temp/mig_cell_probs_retention_nonmarried", keep(match) nogen
ren fracmove prob_retention
append using `married'
drop cell
append using "$temp/census_2000_dahl_stayers"
drop cell uniqid ncell
save "$temp/census_2000_dahl_analysis", replace


**************now run the regressions!

use "$temp/census_2000_dahl_analysis", clear
replace prob_firstbest = 0 if prob_firstbest == .
replace prob_retention = 0 if prob_retention == .
replace corrfunc_stay = 0 if corrfunc_stay == .
local controls `"c.exp##c.exp##c.exp metro married ib2.educ_cat "'
//local controls `"c.exp##c.exp##c.exp metro married ib1.educ_cat "'
local control_func `"c.corrfunc_stay##c.corrfunc_stay c.prob_firstbest##c.prob_firstbest c.prob_retention##c.prob_retention"'
gen lwage = log(wage)

//regressions: no correction function
levelsof statefip, clean local(fips)
foreach fip in `fips'{
	di "`fip'"
	qui reg lwage `controls' if statefip==`fip', robust	
	//local cp`fip'_raw = _b[4.educ_cat]
	local cp`fip'_raw = _b[4.educ_cat]
	local cp`fip'_raw_se = _se[4.educ_cat]
}

//regressions: no correction functions
levelsof statefip, clean local(fips)
foreach fip in `fips'{
	di "`fip'"
	qui reg lwage `controls' `control_func' if statefip==`fip', robust	
	//local cp`fip'_cf = _b[4.educ_cat]
	local cp`fip'_cf = _b[4.educ_cat]
	//local cp`fip'_cf = _se[2.educ_cat]
}

//convert to data
levelsof statefip, clean local (fips)
clear
set obs 50
gen statefip = .
gen cp_raw = .
gen cp_raw_se = .
gen cp_correct = .
local counter = 0

//loop over fips and fill in skill prices
foreach fip in `fips'{
	local counter `++counter'
	replace statefip = `fip' in `counter'
	replace cp_raw = `cp`fip'_raw' in `counter'
	replace cp_raw_se = `cp`fip'_raw_se' in `counter'
	replace cp_correct = `cp`fip'_cf' in `counter'
}	
//replace cp_raw = exp(cp_raw)
//replace cp_correct = exp(cp_correct)
corr cp*
su cp*
gen diff = cp_raw - cp_correct
count if diff> 2 * cp_raw_se
su diff
scatter cp_correct cp_raw || line cp_raw cp_raw , bgcolor(white) graphregion(color(white)) xtitle("Raw Estimates") ytitle("Corrected Estimates") legend(off)
graph export "$output/dahl_coll_2000.png", replace
graph close



keep statefip cp_correct
replace cp_correct = exp(cp_correct)
save "$temp/coll_premia_2000", replace









//end of dofile